repo/checkout: Cache lookups of dirmeta objects
authorColin Walters <walters@verbum.org>
Fri, 14 Apr 2017 18:00:55 +0000 (14:00 -0400)
committerAtomic Bot <atomic-devel@projectatomic.io>
Tue, 25 Apr 2017 13:40:53 +0000 (13:40 +0000)
I was reading a strace the other day and noticed we were loading the same
`.dirmeta` object many times. Unlike the other object types, `.dirmeta` objects
don't accumulate much over time; there are only so many directory metadata types.
(Without SELinux involved it'd probably be 5-6 I'd guess offhand).

For `fedora-atomic/25/x86_64/docker-host` there are currently 34 `.dirmeta` in
the tree.

But how many times during a checkout did we load those 34 dirmeta objects?
With a quick strace:

```
$ strace -s 2048 -f -o strace.log ostree --repo=repo-build checkout -U fedora-atomic/25/x86_64/docker-host host-test-checkout
$ grep dirmeta strace.log | wc -l
7165
```

After, as you'd expect, we just loaded `34` from disk.  We do
6 system calls (`openat+fstat+fstat+read+read+close`) per dirmeta,
so we dropped a total of 42780 system calls - which is about 20% of the total
system calls made.

`perf record` tells me that we're spending ~40 of our time in the kernel during
a checkout, so reducing syscall traffic helps. Though most of that appears to be
in the VFS and XFS layers for `linkat` (which isn't surprising).

So how much did perf improve? Well, on my workstation, I get a lot of
fluctuation in timing, sometimes by 30%, so this was well within the noise. But
it's well worth speeding up checkout, and I think this optimization will shine
more as we improve performance elsewhere.

Closes: #795
Approved by: jlebon

src/libostree/ostree-repo-checkout.c
src/libostree/ostree-repo-private.h
src/libostree/ostree-repo.c

index 5b87c7eca0d229a0f484dbd698b0f17591fa8792..4de6caf9d30542e371481a36eb1026d95690e9a1 100644 (file)
@@ -799,6 +799,8 @@ ostree_repo_checkout_tree (OstreeRepo               *self,
   /* Backwards compatibility */
   options.enable_uncompressed_cache = TRUE;
 
+  g_auto(OstreeRepoMemoryCacheRef) memcache_ref;
+  _ostree_repo_memory_cache_ref_init (&memcache_ref, self);
   return checkout_tree_at (self, &options,
                            AT_FDCWD, gs_file_get_path_cached (destination),
                            source, source_info,
@@ -916,6 +918,8 @@ ostree_repo_checkout_at (OstreeRepo                        *self,
   if (!target_info)
     return FALSE;
 
+  g_auto(OstreeRepoMemoryCacheRef) memcache_ref;
+  _ostree_repo_memory_cache_ref_init (&memcache_ref, self);
   if (!checkout_tree_at (self, options,
                          destination_dfd,
                          destination_path,
index fa18947e26056622daa50f4583ee3ac68a75c2bc..74f032d1bc4729458a34566014f6b3bee86ae46c 100644 (file)
@@ -92,6 +92,9 @@ struct OstreeRepo {
   OstreeRepoTransactionStats txn_stats;
 
   GMutex cache_lock;
+  guint dirmeta_cache_refcount;
+  /* char * checksum → GVariant * for dirmeta objects, used in the checkout path */
+  GHashTable *dirmeta_cache;
 
   gboolean inited;
   gboolean writable;
@@ -126,6 +129,24 @@ typedef struct {
   char checksum[OSTREE_SHA256_STRING_LEN+1];
 } OstreeDevIno;
 
+/* A MemoryCacheRef is an in-memory cache of objects (currently just DIRMETA).  This can
+ * be used when performing an operation that traverses a repository in someway.  Currently,
+ * the primary use case is ostree_repo_checkout_at() avoiding lots of duplicate dirmeta
+ * lookups.
+ */
+typedef struct {
+  OstreeRepo *repo;
+} OstreeRepoMemoryCacheRef;
+
+
+void
+_ostree_repo_memory_cache_ref_init (OstreeRepoMemoryCacheRef *state,
+                                    OstreeRepo               *repo);
+
+void
+_ostree_repo_memory_cache_ref_destroy (OstreeRepoMemoryCacheRef *state);
+G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(OstreeRepoMemoryCacheRef, _ostree_repo_memory_cache_ref_destroy);
+
 #define OSTREE_REPO_TMPDIR_STAGING "staging-"
 #define OSTREE_REPO_TMPDIR_FETCHER "fetcher-"
 
index e5dbed25d7a47ad8a39b0d18837bfa449606aa36..051285d41e799a5e72efa5e6623504cb7b3afc1d 100644 (file)
@@ -538,6 +538,7 @@ ostree_repo_finalize (GObject *object)
   g_clear_pointer (&self->txn_refs, g_hash_table_destroy);
   g_clear_error (&self->writable_error);
   g_clear_pointer (&self->object_sizes, (GDestroyNotify) g_hash_table_unref);
+  g_clear_pointer (&self->dirmeta_cache, (GDestroyNotify) g_hash_table_unref);
   g_mutex_clear (&self->cache_lock);
   g_mutex_clear (&self->txn_stats_lock);
 
@@ -2500,6 +2501,26 @@ load_metadata_internal (OstreeRepo       *self,
 
   g_return_val_if_fail (OSTREE_OBJECT_TYPE_IS_META (objtype), FALSE);
 
+  /* Special caching for dirmeta objects, since they're commonly referenced many
+   * times.
+   */
+  const gboolean is_dirmeta_cachable =
+    (objtype == OSTREE_OBJECT_TYPE_DIR_META && out_variant && !out_stream);
+  if (is_dirmeta_cachable)
+    {
+      GMutex *lock = &self->cache_lock;
+      g_mutex_lock (lock);
+      GVariant *cache_hit = NULL;
+      /* Look it up, if we have a cache */
+      if (self->dirmeta_cache)
+        cache_hit = g_hash_table_lookup (self->dirmeta_cache, sha256);
+      if (cache_hit)
+        *out_variant = g_variant_ref (cache_hit);
+      g_mutex_unlock (lock);
+      if (cache_hit)
+        return TRUE;
+    }
+
   _ostree_loose_path (loose_path_buf, sha256, objtype, self->mode);
 
  if (!ot_openat_ignore_enoent (self->objects_dir_fd, loose_path_buf, &fd,
@@ -2545,6 +2566,16 @@ load_metadata_internal (OstreeRepo       *self,
                                                       data, TRUE);
               g_variant_ref_sink (ret_variant);
             }
+
+          /* Now, let's put it in the cache */
+          if (is_dirmeta_cachable)
+            {
+              GMutex *lock = &self->cache_lock;
+              g_mutex_lock (lock);
+              if (self->dirmeta_cache)
+                g_hash_table_replace (self->dirmeta_cache, g_strdup (sha256), g_variant_ref (ret_variant));
+              g_mutex_unlock (lock);
+            }
         }
       else if (out_stream)
         {
@@ -4932,3 +4963,32 @@ _ostree_repo_allocate_tmpdir (int tmpdir_dfd,
  out:
   return ret;
 }
+
+/* See ostree-repo-private.h for more information about this */
+void
+_ostree_repo_memory_cache_ref_init (OstreeRepoMemoryCacheRef *state,
+                                    OstreeRepo               *repo)
+{
+  state->repo = g_object_ref (repo);
+  GMutex *lock = &repo->cache_lock;
+  g_mutex_lock (lock);
+  repo->dirmeta_cache_refcount++;
+  if (repo->dirmeta_cache == NULL)
+    repo->dirmeta_cache = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, (GDestroyNotify)g_variant_unref);
+  g_mutex_unlock (lock);
+
+}
+
+/* See ostree-repo-private.h for more information about this */
+void
+_ostree_repo_memory_cache_ref_destroy (OstreeRepoMemoryCacheRef *state)
+{
+  OstreeRepo *repo = state->repo;
+  GMutex *lock = &repo->cache_lock;
+  g_mutex_lock (lock);
+  repo->dirmeta_cache_refcount--;
+  if (repo->dirmeta_cache_refcount == 0)
+    g_clear_pointer (&repo->dirmeta_cache, (GDestroyNotify) g_hash_table_unref);
+  g_mutex_unlock (lock);
+  g_object_unref (repo);
+}